import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.StringUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class MySpider {
    public static void main(String[] args) {
        List<NewsEntity> list = new ArrayList<NewsEntity>();
        Connection connect = Jsoup.connect("http://top.baidu.com/buzz?b=1&fr=tph_right");  //百度风云榜网址
        connect.userAgent("Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)");  //模拟火狐浏览器访问网页
        try {
            Document document = connect.get();      //建立连接,获取网页内容为文档对象
            Element main = document.getElementById("main");  //获取需要爬去部位的根元素
            Elements url = main.select("div[class=mainBody]").select("table[class=list-table]")
                    .select("tbody").select("tr"); //css选择器
            int i = 0;
            for (Element element : url) {
                NewsEntity entity = new NewsEntity();
                String attr_url = element.select("td[class=keyword]").select("a[class=list-title]").attr("href");
                String text = element.select("td[class=keyword]").select("a[class=list-title]").text();
                String span = element.select("td[class=last").select("span").text();
                if (StringUtils.isEmpty(attr_url) || StringUtils.isEmpty(text) || StringUtils.isEmpty(span)) {
                    continue;
                }
                entity.setTitle(text);
                entity.setUrl(attr_url);
                entity.setHots(span);
                i++;
                if (i > 10) {
                    break;
                }
                list.add(entity);

            }
            System.out.println(list.toString());
            System.out.println(list.size());

        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("网页元素发生改变或访问被禁止");
        }
    }
}
